{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "aggregate-logan",
   "metadata": {},
   "source": [
    "## Import Packages "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "streaming-console",
   "metadata": {},
   "outputs": [],
   "source": [
    "import dataset\n",
    "import tree as miptree\n",
    "from sklearn import tree\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "legitimate-photographer",
   "metadata": {},
   "source": [
    "## Set Args"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "relative-finder",
   "metadata": {},
   "outputs": [],
   "source": [
    "timelimit = 600\n",
    "seed = 42\n",
    "d = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "separated-glance",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_ratio = 0.5\n",
    "val_ratio = 0.25\n",
    "test_ratio = 0.25"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "appreciated-thompson",
   "metadata": {},
   "source": [
    "## Load Data "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "recent-employee",
   "metadata": {},
   "outputs": [],
   "source": [
    "x, y = dataset.loadData('house-votes-84')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "urban-pattern",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_enc = dataset.oneHot(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "japanese-packing",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=1-train_ratio, random_state=seed)\n",
    "x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, \n",
    "                                                test_size=test_ratio/(test_ratio+val_ratio), random_state=seed)\n",
    "x_train_enc, x_test_enc, y_train, y_test = train_test_split(x_enc, y, test_size=1-train_ratio, random_state=seed)\n",
    "x_val_enc, x_test_enc, y_val, y_test = train_test_split(x_test_enc, y_test, \n",
    "                                                        test_size=test_ratio/(test_ratio+val_ratio), random_state=seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "raised-bullet",
   "metadata": {},
   "source": [
    "## Optimal Classification Tree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ready-lawrence",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training data include 116 instances, 16 features.\n",
      "Academic license - for non-commercial use only - expires 2021-06-13\n",
      "Using license file C:\\Users\\Apocrypse\\gurobi.lic\n",
      "Parameter outputFlag unchanged\n",
      "   Value: 1  Min: 0  Max: 1  Default: 1\n",
      "Parameter LogToConsole unchanged\n",
      "   Value: 1  Min: 0  Max: 1  Default: 1\n",
      "Changed value of parameter timelimit to 600.0\n",
      "   Prev: inf  Min: 0.0  Max: inf  Default: inf\n",
      "Parameter threads unchanged\n",
      "   Value: 0  Min: 0  Max: 1024  Default: 0\n",
      "Gurobi Optimizer version 9.1.1 build v9.1.1rc0 (win64)\n",
      "Thread count: 4 physical cores, 8 logical processors, using up to 8 threads\n",
      "Optimize a model with 1552 rows, 546 columns and 16105 nonzeros\n",
      "Model fingerprint: 0xfcf2b093\n",
      "Variable types: 19 continuous, 527 integer (527 binary)\n",
      "Coefficient statistics:\n",
      "  Matrix range     [1e+00, 1e+02]\n",
      "  Objective range  [1e-02, 2e-02]\n",
      "  Bounds range     [1e+00, 1e+00]\n",
      "  RHS range        [1e+00, 1e+02]\n",
      "\n",
      "User MIP start produced solution with objective 0.0815385 (0.01s)\n",
      "Loaded user MIP start with objective 0.0815385\n",
      "\n",
      "Presolve removed 8 rows and 4 columns\n",
      "Presolve time: 0.03s\n",
      "Presolved: 1544 rows, 542 columns, 13171 nonzeros\n",
      "Variable types: 0 continuous, 542 integer (530 binary)\n",
      "\n",
      "Root relaxation: objective 2.633356e-03, 709 iterations, 0.02 seconds\n",
      "\n",
      "    Nodes    |    Current Node    |     Objective Bounds      |     Work\n",
      " Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time\n",
      "\n",
      "     0     0    0.01000    0  241    0.08154    0.01000  87.7%     -    0s\n",
      "     0     0    0.01000    0  202    0.08154    0.01000  87.7%     -    0s\n",
      "     0     0    0.01000    0  204    0.08154    0.01000  87.7%     -    0s\n",
      "     0     0    0.01000    0  152    0.08154    0.01000  87.7%     -    0s\n",
      "H    0     0                       0.0715385    0.01000  86.0%     -    0s\n",
      "     0     0    0.01000    0  150    0.07154    0.01000  86.0%     -    0s\n",
      "     0     0    0.01000    0  123    0.07154    0.01000  86.0%     -    0s\n",
      "     0     0    0.01000    0  234    0.07154    0.01000  86.0%     -    0s\n",
      "     0     0    0.01000    0  209    0.07154    0.01000  86.0%     -    1s\n",
      "     0     0    0.01000    0  233    0.07154    0.01000  86.0%     -    1s\n",
      "     0     0    0.01000    0  187    0.07154    0.01000  86.0%     -    1s\n",
      "     0     0    0.01053    0  230    0.07154    0.01053  85.3%     -    1s\n",
      "     0     0    0.01113    0  241    0.07154    0.01113  84.4%     -    1s\n",
      "     0     0    0.01113    0  247    0.07154    0.01113  84.4%     -    1s\n",
      "     0     0    0.01113    0  229    0.07154    0.01113  84.4%     -    1s\n",
      "     0     0    0.01113    0  229    0.07154    0.01113  84.4%     -    1s\n",
      "     0     2    0.01113    0  229    0.07154    0.01113  84.4%     -    1s\n",
      "\n",
      "Cutting planes:\n",
      "  Gomory: 4\n",
      "  Cover: 395\n",
      "  Clique: 13\n",
      "  MIR: 29\n",
      "  StrongCG: 2\n",
      "  Flow cover: 30\n",
      "  GUB cover: 49\n",
      "  Inf proof: 7\n",
      "  Zero half: 11\n",
      "\n",
      "Explored 1874 nodes (94559 simplex iterations) in 4.81 seconds\n",
      "Thread count was 8 (of 8 available processors)\n",
      "\n",
      "Solution count 2: 0.0715385 0.0815385 \n",
      "\n",
      "Optimal solution found (tolerance 1.00e-04)\n",
      "Best objective 7.153846153846e-02, best bound 7.153846153846e-02, gap 0.0000%\n"
     ]
    }
   ],
   "source": [
    "octree = miptree.optimalDecisionTreeClassifier(max_depth=d, min_samples_split=0, alpha=0.01, timelimit=timelimit)\n",
    "octree.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "prepared-forest",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9655172413793104"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_train_pred = octree.predict(x_train)\n",
    "accuracy_score(y_train, y_train_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "strategic-midnight",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9655172413793104"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test_pred = octree.predict(x_test)\n",
    "accuracy_score(y_test, y_test_pred)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "involved-powell",
   "metadata": {},
   "source": [
    "## Optimal Classification Tree with Binary Encoding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "overall-skating",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training data include 116 instances, 16 features.\n",
      "Parameter outputFlag unchanged\n",
      "   Value: 1  Min: 0  Max: 1  Default: 1\n",
      "Parameter LogToConsole unchanged\n",
      "   Value: 1  Min: 0  Max: 1  Default: 1\n",
      "Changed value of parameter timelimit to 600.0\n",
      "   Prev: inf  Min: 0.0  Max: inf  Default: inf\n",
      "Parameter threads unchanged\n",
      "   Value: 0  Min: 0  Max: 1024  Default: 0\n",
      "Gurobi Optimizer version 9.1.1 build v9.1.1rc0 (win64)\n",
      "Thread count: 4 physical cores, 8 logical processors, using up to 8 threads\n",
      "Optimize a model with 179 rows, 528 columns and 8472 nonzeros\n",
      "Model fingerprint: 0x441ee230\n",
      "Variable types: 472 continuous, 56 integer (56 binary)\n",
      "Coefficient statistics:\n",
      "  Matrix range     [1e+00, 1e+02]\n",
      "  Objective range  [2e-02, 2e-02]\n",
      "  Bounds range     [1e+00, 1e+00]\n",
      "  RHS range        [1e+00, 1e+02]\n",
      "\n",
      "User MIP start produced solution with objective 0.0615385 (0.02s)\n",
      "Loaded user MIP start with objective 0.0615385\n",
      "\n",
      "Presolve removed 27 rows and 99 columns\n",
      "Presolve time: 0.03s\n",
      "Presolved: 152 rows, 429 columns, 7187 nonzeros\n",
      "Variable types: 377 continuous, 52 integer (52 binary)\n",
      "\n",
      "Root relaxation: objective 0.000000e+00, 14 iterations, 0.00 seconds\n",
      "\n",
      "    Nodes    |    Current Node    |     Objective Bounds      |     Work\n",
      " Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time\n",
      "\n",
      "     0     0    0.00000    0    9    0.06154    0.00000   100%     -    0s\n",
      "     0     0    0.00000    0   10    0.06154    0.00000   100%     -    0s\n",
      "     0     0    0.00000    0    2    0.06154    0.00000   100%     -    0s\n",
      "     0     2    0.00000    0    2    0.06154    0.00000   100%     -    0s\n",
      "\n",
      "Cutting planes:\n",
      "  Gomory: 2\n",
      "  Implied bound: 362\n",
      "  MIR: 6\n",
      "  Relax-and-lift: 14\n",
      "\n",
      "Explored 990 nodes (12923 simplex iterations) in 0.51 seconds\n",
      "Thread count was 8 (of 8 available processors)\n",
      "\n",
      "Solution count 1: 0.0615385 \n",
      "\n",
      "Optimal solution found (tolerance 1.00e-04)\n",
      "Best objective 6.153846153846e-02, best bound 6.153846153846e-02, gap 0.0000%\n"
     ]
    }
   ],
   "source": [
    "boct = miptree.binOptimalDecisionTreeClassifier(max_depth=d, min_samples_split=0, timelimit=timelimit)\n",
    "boct.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "incorporate-cisco",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9655172413793104"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_train_pred = boct.predict(x_train)\n",
    "accuracy_score(y_train, y_train_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "fresh-curve",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9655172413793104"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test_pred = boct.predict(x_test)\n",
    "accuracy_score(y_test, y_test_pred)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "roman-raise",
   "metadata": {},
   "source": [
    "## Max Flow Classification Tree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "lined-links",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training data include 116 instances, 16 features.\n",
      "Parameter outputFlag unchanged\n",
      "   Value: 1  Min: 0  Max: 1  Default: 1\n",
      "Parameter LogToConsole unchanged\n",
      "   Value: 1  Min: 0  Max: 1  Default: 1\n",
      "Changed value of parameter timelimit to 600.0\n",
      "   Prev: inf  Min: 0.0  Max: inf  Default: inf\n",
      "Parameter threads unchanged\n",
      "   Value: 0  Min: 0  Max: 1024  Default: 0\n",
      "Changed value of parameter lazyConstraints to 1\n",
      "   Prev: 0  Min: 0  Max: 1  Default: 0\n",
      "Gurobi Optimizer version 9.1.1 build v9.1.1rc0 (win64)\n",
      "Thread count: 4 physical cores, 8 logical processors, using up to 8 threads\n",
      "Optimize a model with 14 rows, 185 columns and 86 nonzeros\n",
      "Model fingerprint: 0xacf718a6\n",
      "Variable types: 116 continuous, 69 integer (69 binary)\n",
      "Coefficient statistics:\n",
      "  Matrix range     [1e+00, 1e+00]\n",
      "  Objective range  [2e-02, 2e-02]\n",
      "  Bounds range     [1e+00, 1e+00]\n",
      "  RHS range        [1e+00, 1e+00]\n",
      "\n",
      "User MIP start did not produce a new incumbent solution\n",
      "\n",
      "Presolve removed 6 rows and 6 columns\n",
      "Presolve time: 0.00s\n",
      "Presolved: 8 rows, 179 columns, 76 nonzeros\n",
      "Variable types: 116 continuous, 63 integer (63 binary)\n",
      "\n",
      "Root relaxation: objective 1.784615e+00, 5 iterations, 0.00 seconds\n",
      "Another try with MIP start\n",
      "\n",
      "    Nodes    |    Current Node    |     Objective Bounds      |     Work\n",
      " Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time\n",
      "\n",
      "H    0     0                       1.7230769    1.78462  3.57%     -    0s\n",
      "     0     0    1.78462    0    6    1.72308    1.78462  3.57%     -    0s\n",
      "     0     0    1.78462    0    6    1.72308    1.78462  3.57%     -    0s\n",
      "     0     2    1.78462    0    8    1.72308    1.78462  3.57%     -    0s\n",
      "\n",
      "Cutting planes:\n",
      "  Lazy constraints: 310\n",
      "\n",
      "Explored 457 nodes (6550 simplex iterations) in 0.51 seconds\n",
      "Thread count was 8 (of 8 available processors)\n",
      "\n",
      "Solution count 1: 1.72308 \n",
      "\n",
      "Optimal solution found (tolerance 1.00e-04)\n",
      "Best objective 1.723076923077e+00, best bound 1.723076923077e+00, gap 0.0000%\n",
      "\n",
      "User-callback calls 1041, time in user-callback 0.23 sec\n"
     ]
    }
   ],
   "source": [
    "mfoct = miptree.maxFlowOptimalDecisionTreeClassifier(max_depth=d, alpha=0, timelimit=timelimit)\n",
    "mfoct.fit(x_train_enc, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "established-electricity",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9655172413793104"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_train_pred = mfoct.predict(x_train_enc)\n",
    "accuracy_score(y_train, y_train_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "australian-notebook",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9655172413793104"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test_pred = mfoct.predict(x_test_enc)\n",
    "accuracy_score(y_test, y_test_pred)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "boxed-museum",
   "metadata": {},
   "source": [
    "## SK-Learn Decision Tree "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "parallel-terror",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeClassifier(max_depth=2)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf = tree.DecisionTreeClassifier(max_depth=d)\n",
    "clf.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "adjusted-ukraine",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9655172413793104"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_train_pred = clf.predict(x_train)\n",
    "accuracy_score(y_train, y_train_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "residential-serbia",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9655172413793104"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test_pred = clf.predict(x_test)\n",
    "accuracy_score(y_test, y_test_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "combined-hudson",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
